Linear regression model is one of the simplest regression models. It assumes linear relationship between $X$ and $Y$. The output equation is defined as follows:
$$\hat{y} = WX + b$$The Advertising data set (from "An Introduction to Statistical Learning", textbook by Gareth James, Robert Tibshirani, and Trevor Hastie) consists of the sales of a product in 200 different markets, along with advertising budgets for the product in each of those markets for three different media: TV, radio, and newspaper.
Objective: to determine if there is an association between advertising and sales, then we can instruct our client to adjust advertising budgets, thereby indirectly increasing sales.
We want to train an inference model, a series of mathematical expressions we want to apply to our data that depends on a series of parameters. The values of parameters change through training in order for the model to learn and adjust its output.
The training loop is:
We will repeat this process a number of times, according to the learning rate.
After the training we will apply an evaluation phase.
In [8]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
# Load data.
import numpy as np
data = pd.read_csv('data/Advertising.csv',index_col=0)
train_X = data[['TV']].values
train_Y = data.Sales.values
train_Y = train_Y[:,np.newaxis]
n_samples = train_X.shape[0]
print n_samples
print train_X.shape, train_Y.shape
In [9]:
# data visualization
import seaborn as sns
fig, ax = plt.subplots(1, 1)
ax.set_ylabel('Results',
rotation=0,
ha='right', # horizontalalignment
ma='left', # multiline alignment
)
ax.set_xlabel('TV')
fig.set_facecolor('#EAEAF2')
ax.plot(train_X, train_Y, 'o', color=sns.xkcd_rgb['pale red'], alpha=0.6,label='Original data')
plt.show()
In [10]:
import tensorflow as tf
tf.reset_default_graph()
# Training Parameters
learning_rate = 0.1
training_epochs = 100
# Define tf Graph Inputs
X = tf.placeholder("float",[None,1])
y = tf.placeholder("float",[None,1])
# Create Model variables
# Set model weights
W = tf.Variable(np.random.randn(), name="weight")
b = tf.Variable(np.random.randn(), name="bias")
# Construct a linear model
y_pred = tf.add(tf.mul(X, W), b)
# Minimize the squared errors
cost = tf.reduce_sum(tf.pow(y_pred - y,2))/(2*n_samples) #L2 loss
# Define the optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
cost_plot = []
# Fit all training data
for epoch in range(training_epochs):
sess.run(optimizer,
feed_dict={X: train_X, y: train_Y})
cost_plot.append(sess.run(cost,
feed_dict={X: train_X, y:train_Y}))
print ""
print "Optimization Finished!"
print "cost=", sess.run(cost,
feed_dict={X: train_X, y: train_Y}), \
"W=", sess.run(W), "b=", sess.run(b)
fig, ax = plt.subplots(1, 1)
ax.set_ylabel('Results',
rotation=0,
ha='right', # horizontalalignment
ma='left', # multiline alignment
)
ax.set_xlabel('TV')
fig.set_facecolor('#EAEAF2')
ax.plot(train_X,
train_Y, 'o',
color=sns.xkcd_rgb['pale red'],
alpha=0.6,label='Original data')
plt.plot(train_X,
sess.run(W) * train_X + sess.run(b),
label='Fitted line')
plt.show()
x = range(len(cost_plot))
plt.plot(x, np.sqrt(cost_plot))
plt.show()
print cost_plot[-1]
In [11]:
# your code here
In [12]:
tf.reset_default_graph()
# Parameters
learning_rate = 1e-2
training_epochs = 2000
display_step = 200
import numpy as np
data = pd.read_csv('data/Advertising.csv',index_col=0)
train_X = data[['TV','Radio','Newspaper']].values
train_Y = data.Sales.values
train_Y = train_Y[:,np.newaxis]
n_samples = train_X.shape[0]
print n_samples
print train_X.shape, train_Y.shape
In [13]:
# Define tf Graph Inputs
X = tf.placeholder("float",[None,3])
y = tf.placeholder("float",[None,1])
# Create Model variables
# Set model weights
W = tf.Variable(tf.zeros([3, 1]),name="bias")
b = tf.Variable(np.random.randn(), name="bias")
# Construct a multidimensional linear model
y_pred = tf.matmul(X, W) + b
# Minimize the squared errors
cost = tf.reduce_sum(tf.pow(y_pred-y,2))/(2*n_samples) #L2 loss
# Define the optimizer
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Fit all training data
for epoch in range(training_epochs):
sess.run(optimizer, feed_dict={X: train_X, y: train_Y})
#Display logs per epoch step
if epoch % display_step == 0:
print "Epoch: ", '%04d' % (epoch+1), "\n cost= ", sess.run(cost, feed_dict={X: train_X, y: train_Y}), \
"\n W= ", sess.run(W), "\n b= ", sess.run(b), "\n"
print "Optimization Finished!"
print "cost= \n", sess.run(cost, feed_dict={X: train_X, y: train_Y}), \
"\n W= \n", sess.run(W), "\n b= \n", sess.run(b)
tf helpersUpdating of parameters through many training cycles can be dangerous (f.e. if your computer lose power). The tf.train.Saver class can save the graph variables for later reuse.
...
saver = tf.train.Saver()
with tf.Session() as sess:
for step in range(training_steps):
sess.run(...)
if step % 1000 == 0:
saver.save(sess, 'my-model', global_step=step)
# evaluation
saver.safe(sess, 'my-model', global_step=training_steps)
sess.close()
If we want to recover the training from a certain point we should use the tf.train.get_checkpoint_state method, which will verify that there is a checkpoint saved, and the tf.train.Saver.restore method to recover the variable values.
In [ ]:
import tensorflow as tf
import os
tf.reset_default_graph()
# same params and variables initialization as log reg.
W = tf.Variable(tf.zeros([5, 1]), name="weights")
b = tf.Variable(0., name="bias")
# your code here: write a function called 'inference' to implement the logistic regression model
The cross-entropy loss function is the best suited for logistic regression:
$$ L = -\sum_i (y_i \cdot \log (\hat{y}_i) + (1 - y_i) \cdot \log (1 - \hat{y}_i)) $$
In [ ]:
def loss(X, Y):
#your code here
Now we are going to read the survivor Titanic dataset. The model will have to infer, based on the passenger age, sex and ticket class if the passenger survived or not. We will create a batch to read many rows packed in a single tensor for computing the inference efficiently.
In [ ]:
def read_csv(batch_size, file_name, record_defaults):
#
filename_queue = tf.train.string_input_producer([os.path.join(os.getcwd(), file_name)])
reader = tf.TextLineReader(skip_header_lines=1)
key, value = reader.read(filename_queue)
# decode_csv will convert a Tensor from type string (the text line) in
# a tuple of tensor columns with the specified defaults, which also
# sets the data type for each column
decoded = tf.decode_csv(value, record_defaults=record_defaults)
# batch actually reads the file and loads "batch_size" rows in a single tensor
return tf.train.shuffle_batch(decoded,
batch_size=batch_size,
capacity=batch_size * 50,
min_after_dequeue=batch_size)
We have categorical features in this dataset (ticket_class, gender) and we need to convert them to numbers. To this end we can convert each categorical feature to $N$ boolean features that represent each possible value.
In case of categorical values with to values it is enough to use a binary feature.
In [ ]:
def inputs():
passenger_id, survived, pclass, name, sex, age, sibsp, parch, ticket, fare, cabin, embarked = \
read_csv(100, "data/train_titanic.csv", [[0.0], [0.0], [0], [""], [""], [0.0], [0.0], [0.0], [""], [0.0], [""], [""]])
# convert categorical data
is_first_class = tf.to_float(tf.equal(pclass, [1]))
is_second_class = tf.to_float(tf.equal(pclass, [2]))
is_third_class = tf.to_float(tf.equal(pclass, [3]))
gender = tf.to_float(tf.equal(sex, ["female"]))
# Finally we pack all the features in a single matrix;
# We then transpose to have a matrix with one example per row and one feature per column.
features = tf.transpose(tf.pack([is_first_class, is_second_class, is_third_class, gender, age]))
survived = tf.reshape(survived, [100, 1])
return features, survived
def train(total_loss):
learning_rate = 0.01
return tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)
def evaluate(sess, X, Y):
predicted = tf.cast(inference(X) > 0.5, tf.float32)
print sess.run(tf.reduce_mean(tf.cast(tf.equal(predicted, Y), tf.float32)))
# Launch the graph in a session, setup boilerplate
with tf.Session() as sess:
tf.initialize_all_variables().run()
X, Y = inputs()
total_loss = loss(X, Y)
train_op = train(total_loss)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# actual training loop
training_steps = 5000
for step in range(training_steps):
sess.run([train_op])
# for debugging and learning purposes, see how the loss gets decremented thru training steps
if step % 100 == 0:
print "loss: ", sess.run([total_loss])
evaluate(sess, X, Y)
import time
time.sleep(5)
coord.request_stop()
coord.join(threads)
sess.close()
In [21]:
# Import MINST data
# The MNIST data is split into three parts: 55,000 data points of training data (mnist.train),
# 10,000 points of test data (mnist.test), and 5,000 points of validation data (mnist.validation).
# Both the training set and test set contain images and their corresponding labels; for example the
# training images are mnist.train.images and the training labels are mnist.train.labels.
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from matplotlib import pyplot as plt
import numpy as np
%matplotlib inline
mnist = input_data.read_data_sets("data/", one_hot=True)
fig, ax = plt.subplots(figsize=(2, 2))
plt.imshow(mnist.train.images[0].reshape((28, 28)), interpolation='nearest', cmap='gray')
plt.show()
print "Class: ", np.argmax(mnist.train.labels[0])
In [15]:
# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1
# Network Parameters
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])
In [16]:
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
In [17]:
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.initialize_all_variables()
In [18]:
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(mnist.train.num_examples/batch_size)
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x,
y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print "Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost)
print "Optimization Finished!"
# Test model
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print "Accuracy:", accuracy.eval({x: mnist.test.images, y: mnist.test.labels})